*-----------------------------------------
*Part I: Decomposition (Labor)
*------------------------------------------

cd "D:\Nanjing\2020\wage gap\Replication"   
use "labor-basic-tpu-reg-1.dta",clear
 gen year=t


drop if n==0

* for this decomposition, we comapre firm-level gap in year 1998 and 2007
keep if year==1998|year==2007
gen indc_y=1 if year==1998
replace indc_y=2 if year!=1998
replace year=indc_y



egen indc_f=group(newid)
drop newid
gen party_id=indc_f
duplicates drop party_id year, force
/*
duplicates tag party_id year, gen(indc_dum)
drop if indc_dum>0
*/


* 1. construct log of firm-level efficiency gap in each year

gen lnabsG_gdp=log(absG_gdp)

gen Gap_f=absG_gdp


* 2. divide firms into 3-groups: 
* (1). continuing firms;
* (2). exit;
* (3). entrants.
tsset party_id year
tsfill, full
gen dummy1=.
replace dummy1=1 if year==1&(n!=.&n!=0)

gen dummy2=.
replace dummy2=1 if year==2&(n!=.&n!=0)

sort party_id
by party_id: egen max_d1=max(dummy1)
by party_id: egen max_d2=max(dummy2)

replace dummy1=max_d1 if dummy1==.
replace dummy2=max_d2 if dummy2==.

* (1)status=1: continuing firms
* (2)status=2: exit
* (3)status=3: entrant
gen status=.
replace status=1 if dummy1==1&dummy2==1
replace status=2 if dummy1==1&dummy2==.
replace status=3 if dummy1==.&dummy2==1

* 3. generate the output share of firm i in a given year
bysort year status: egen ub_gap=pctile(Gap_f), p(95)
drop if Gap_f>ub_gap &status==3

sort year
by year: egen ann_labor=sum(n)
gen share=n/ann_labor  /*the share each firm accounts in a given year*/

*****************************************************************************

* 4. construct average efficiency gap in year 1 and year 2
sort year
by year: egen ave_gap=sum(share*Gap_f) /*weighted annual level efficiency gap*/

/*
* The aggregate pollution emission change between pre-WTO and post-WTO.
(1) delta_ave_so2_int=21.06-25.21 (year2-year1)
(2) delta_ave_dust_int=11.11-12.13
(3) delta_ave_gas_int=2.79-2.84
(4) delta_ave_water_int=29.22-34.95

*/

*************************************************************************************************************

* 5. decomposition
*----------------------------------------------------------------------------------------
* 5.1 decompose emission intensity change in the three groups: continuing, exit ,entrant
*----------------------------------------------------------------------------------------
* construct the emission intensity in each year and for each group, 
* but we only need to keep the value for "continuing group"
sort year status

*---------------------------------------------------------------------------------------------
* for continuing firms
by year status: egen cont_labor=sum(n)
gen cont_share=n/cont_labor /*labor share within the continuing firm group in each year*/
by year status: egen cont_gap=sum(cont_share*Gap_f)



* for exit
gen exit_gap=cont_gap              if status==2
gen exit_share=cont_share          if status==2



* for entrants
gen entry_gap=cont_gap              if status==3
gen entry_share=cont_share          if status==3

*----------------------------------------------------------------------------------------------------

replace cont_gap=.      if status!=1
replace cont_labor=.    if status!=1

sort year
by year: egen d1=max(cont_gap)
by year: egen d2=max(cont_labor)


* The variable "cont_xxx_int" is the annaul level average emission intensity for continuing firms.
replace cont_gap=d1
replace cont_labor=d2


drop d1 d2 


gen s1=cont_labor/ann_labor if year==1  /*output share of continuing firms in year 1*/
gen s2=cont_labor/ann_labor if year==2  /*output share of continuing firms in year 2*/




duplicates drop status year, force
tsset status year
sort status year
by status: gen cont_gap_lag=l.cont_gap

*****************************************************************************
* Part III: Results for labor-based decomposition
* 1.Decomposition among a. continuing; b. new entrans; c. exit firms;
*****************************************************************************


gen delta_cont_gap=cont_gap-cont_gap_lag
gen delta_entry_gap=(1-s2)*(entry_gap-cont_gap)
gen delta_exit_gap=(1-s1)*(cont_gap-exit_gap)

*--------------------------------------------------------
*(1) Initial Average gap (Column 1 of Table 2):
sum ave_gap if year==1
sum ave_gap if year==2
*---------------------------------------------------------
*(2) Total % change So2-intensity (Column 2 of Table 2):
*gen total_change=-6.94-0.45+2.91
*sum total_change if year==1

*--------------------------------------------------------------
*(3) change cuased by group 1: continuing (Column 3 of Table 2)
sum delta_cont_gap
*-----------------------------------------------------------
* (4) change cuased by group 2: entrants (Column 6 of table 2)
sum delta_entry_gap
*-----------------------------------------------------------
* (5) change cuased by group 3: exits (Column 7 of table 2)
sum delta_exit_gap

*************************************split here**************************************










***************************************************************

***************************************************************

***************************************************************

*-----------------------------------------
*Part II: Decomposition (Capital)
*------------------------------------------

cd "D:\Nanjing\2020\wage gap\Replication"   
use "labor-basic-tpu-reg-1.dta",clear
 gen year=t


drop if n==0
replace n=k
* for this decomposition, we comapre firm-level gap in year 1998 and 2007
keep if year==1998|year==2007
gen indc_y=1 if year==1998
replace indc_y=2 if year!=1998
replace year=indc_y



egen indc_f=group(newid)
drop newid
gen party_id=indc_f
duplicates drop party_id year, force
/*
duplicates tag party_id year, gen(indc_dum)
drop if indc_dum>0
*/


* 1. construct log of firm-level efficiency gap in each year

gen lnabsG_gdp=log(absG_gdp)

gen Gap_f=absG_gdp


* 2. divide firms into 3-groups: 
* (1). continuing firms;
* (2). exit;
* (3). entrants.
tsset party_id year
tsfill, full
gen dummy1=.
replace dummy1=1 if year==1&(n!=.&n!=0)

gen dummy2=.
replace dummy2=1 if year==2&(n!=.&n!=0)

sort party_id
by party_id: egen max_d1=max(dummy1)
by party_id: egen max_d2=max(dummy2)

replace dummy1=max_d1 if dummy1==.
replace dummy2=max_d2 if dummy2==.

* (1)status=1: continuing firms
* (2)status=2: exit
* (3)status=3: entrant
gen status=.
replace status=1 if dummy1==1&dummy2==1
replace status=2 if dummy1==1&dummy2==.
replace status=3 if dummy1==.&dummy2==1

* 3. generate the output share of firm i in a given year
bysort year status: egen ub_gap=pctile(Gap_f), p(95)
*drop if Gap_f>ub_gap &(status==2)

sort year
by year: egen ann_labor=sum(n)
gen share=n/ann_labor  /*the share each firm accounts in a given year*/

*****************************************************************************

* 4. construct average efficiency gap in year 1 and year 2
sort year
by year: egen ave_gap=sum(share*Gap_f) /*weighted annual level efficiency gap*/

/*
* The aggregate pollution emission change between pre-WTO and post-WTO.
(1) delta_ave_so2_int=21.06-25.21 (year2-year1)
(2) delta_ave_dust_int=11.11-12.13
(3) delta_ave_gas_int=2.79-2.84
(4) delta_ave_water_int=29.22-34.95

*/

*************************************************************************************************************

* 5. decomposition
*----------------------------------------------------------------------------------------
* 5.1 decompose emission intensity change in the three groups: continuing, exit ,entrant
*----------------------------------------------------------------------------------------
* construct the emission intensity in each year and for each group, 
* but we only need to keep the value for "continuing group"
sort year status

*---------------------------------------------------------------------------------------------
* for continuing firms
by year status: egen cont_labor=sum(n)
gen cont_share=n/cont_labor /*labor share within the continuing firm group in each year*/
by year status: egen cont_gap=sum(cont_share*Gap_f)



* for exit
gen exit_gap=cont_gap              if status==2
gen exit_share=cont_share          if status==2



* for entrants
gen entry_gap=cont_gap              if status==3
gen entry_share=cont_share          if status==3

*----------------------------------------------------------------------------------------------------

replace cont_gap=.      if status!=1
replace cont_labor=.    if status!=1

sort year
by year: egen d1=max(cont_gap)
by year: egen d2=max(cont_labor)


* The variable "cont_xxx_int" is the annaul level average emission intensity for continuing firms.
replace cont_gap=d1
replace cont_labor=d2


drop d1 d2 


gen s1=cont_labor/ann_labor if year==1  /*output share of continuing firms in year 1*/
gen s2=cont_labor/ann_labor if year==2  /*output share of continuing firms in year 2*/




duplicates drop status year, force
tsset status year
sort status year
by status: gen cont_gap_lag=l.cont_gap

*****************************************************************************
* Part III: Results for labor-based decomposition
* 1.Decomposition among a. continuing; b. new entrans; c. exit firms;
*****************************************************************************


gen delta_cont_gap=cont_gap-cont_gap_lag
gen delta_entry_gap=(1-s2)*(entry_gap-cont_gap)
gen delta_exit_gap=(1-s1)*(exit_gap-cont_gap)

*--------------------------------------------------------
*(1) Initial Average gap (Column 1 of Table 2):
sum ave_gap if year==1
sum ave_gap if year==2
*---------------------------------------------------------
*(2) Total % change So2-intensity (Column 2 of Table 2):
*gen total_change=-6.94-0.45+2.91
*sum total_change if year==1

*--------------------------------------------------------------
*(3) change cuased by group 1: continuing (Column 3 of Table 2)
sum delta_cont_gap
*-----------------------------------------------------------
* (4) change cuased by group 2: entrants (Column 6 of table 2)
sum delta_entry_gap
*-----------------------------------------------------------
* (5) change cuased by group 3: exits (Column 7 of table 2)
sum delta_exit_gap

*************************************split here**************************************









*************************************split here**************************************
*************************************split here**************************************
*************************************split here**************************************

*-----------------------------------------
*Part III: Decomposition (Labor and Export)
*------------------------------------------

cd "D:\Nanjing\2020\wage gap\Replication"   
use "labor-basic-tpu-reg-1.dta",clear
 gen year=t


drop if n==0

* for this decomposition, we comapre firm-level gap in year 1998 and 2007
keep if year==2000|year==2007
gen indc_y=1 if year==2000
replace indc_y=2 if year!=2000
replace year=indc_y



egen indc_f=group(newid)
drop newid
gen party_id=indc_f
duplicates drop party_id year, force
/*
duplicates tag party_id year, gen(indc_dum)
drop if indc_dum>0
*/


* 1. construct log of firm-level efficiency gap in each year

gen lnabsG_gdp=log(absG_gdp)

gen Gap_f=absG_gdp


* 2. divide firms into 3-groups: 
* (1). continuing firms;
* (2). exit;
* (3). entrants.

cap drop ddd
gen ddd=1
bysort party_id: egen indc=sum(ddd)
keep if indc==2


tsset party_id year
tsfill, full
gen dummy1=.
replace dummy1=1 if year==1&(d_export>0&d_export!=.)

gen dummy2=.
replace dummy2=1 if year==2&(d_export>0&d_export!=.)

gen dummy3=.
replace dummy3=1 if year==1&(d_export==0|d_export==.)

gen dummy4=.
replace dummy4=1 if year==2&(d_export==0|d_export==.)

sort party_id
by party_id: egen max_d1=max(dummy1)
by party_id: egen max_d2=max(dummy2)

replace dummy1=max_d1 if dummy1==.
replace dummy2=max_d2 if dummy2==.



* (1)status=1: continuing firms
* (2)status=2: exit
* (3)status=3: entrant
gen status=.
replace status=1 if dummy1==1&dummy2==1
replace status=3 if dummy3==1&dummy2==1
bysort party_id: egen status_new=max(status)
replace status=status_new
drop status_new

replace status=2 if status!=1&status!=3

* 3. generate the output share of firm i in a given year
bysort year status: egen ub_gap=pctile(Gap_f), p(95)
*drop if Gap_f>ub_gap &status==3

sort year
by year: egen ann_labor=sum(n)
gen share=n/ann_labor  /*the share each firm accounts in a given year*/

*****************************************************************************

* 4. construct average efficiency gap in year 1 and year 2
sort year
by year: egen ave_gap=sum(share*Gap_f) /*weighted annual level efficiency gap*/

/*
* The aggregate pollution emission change between pre-WTO and post-WTO.
(1) delta_ave_so2_int=21.06-25.21 (year2-year1)
(2) delta_ave_dust_int=11.11-12.13
(3) delta_ave_gas_int=2.79-2.84
(4) delta_ave_water_int=29.22-34.95

*/

*************************************************************************************************************

* 5. decomposition
*----------------------------------------------------------------------------------------
* 5.1 decompose emission intensity change in the three groups: continuing, exit ,entrant
*----------------------------------------------------------------------------------------
* construct the emission intensity in each year and for each group, 
* but we only need to keep the value for "continuing group"
sort year status

*---------------------------------------------------------------------------------------------
* for continuing firms
by year status: egen cont_labor=sum(n)
by year status: egen entry_labor=sum(n) if status==3
by year status: egen exit_labor=sum(n)  if status==2
gen cont_share=n/cont_labor /*labor share within the continuing firm group in each year*/
by year status: egen cont_gap=sum(cont_share*Gap_f)

gen entry_share=n/entry_labor /*labor share within the continuing firm group in each year*/
by year status: egen entry_gap=sum(entry_share*Gap_f)

gen exit_share=n/exit_labor /*labor share within the continuing firm group in each year*/
by year status: egen exit_gap=sum(exit_share*Gap_f)

/*
* for exit
gen exit_gap=cont_gap              if status==2
gen exit_share=cont_share          if status==2



* for entrants
gen entry_gap=cont_gap              if status==3
gen entry_share=cont_share          if status==3
*/
*----------------------------------------------------------------------------------------------------

replace cont_gap=.      if status!=1
replace cont_labor=.    if status!=1

sort year
by year: egen d1=max(cont_gap)
by year: egen d2=max(cont_labor)

by year: egen d3=max(entry_gap)
by year: egen d4=max(entry_labor)

by year: egen d5=max(exit_gap)
by year: egen d6=max(exit_labor)


* The variable "cont_xxx_int" is the annaul level average emission intensity for continuing firms.
replace cont_gap=d1
replace cont_labor=d2

replace entry_gap=d3
replace entry_labor=d4


replace exit_gap=d5
replace exit_labor=d6


drop d1 d2 d3 d4 d5 d6


gen s1=entry_labor/ann_labor if year==2  /*output share of continuing firms in year 1*/
gen s2=exit_labor/ann_labor if year==2  /*output share of continuing firms in year 2*/




duplicates drop status year, force
tsset status year
sort status year
by status: gen cont_gap_lag=l.cont_gap
by status: gen entry_gap_lag=l.entry_gap
by status: gen exit_gap_lag=l.exit_gap


bysort year: egen entry_gap_lag2=max(entry_gap_lag)
replace entry_gap_lag= entry_gap_lag2
drop  entry_gap_lag2
*****************************************************************************
* Part III: Results for labor-based decomposition
* 1.Decomposition among a. continuing; b. new entrans; c. exit firms;
*****************************************************************************

keep if year==2
gen delta_cont_gap=(cont_gap-cont_gap_lag)
gen delta_entry_gap=s1*(entry_gap-entry_gap_lag)
gen delta_exit_gap=s2*(exit_gap-exit_gap_lag)

*--------------------------------------------------------
*(1) Initial Average gap (Column 1 of Table 2):
sum ave_gap if year==1

*---------------------------------------------------------
*(2) Total % change So2-intensity (Column 2 of Table 2):
*gen total_change=-6.94-0.45+2.91
*sum total_change if year==1

*--------------------------------------------------------------
*(3) change cuased by group 1: continuing (Column 3 of Table 2)
sum delta_cont_gap
*-----------------------------------------------------------
* (4) change cuased by group 2: entrants (Column 6 of table 2)
sum delta_entry_gap
*-----------------------------------------------------------
* (5) change cuased by group 3: exits (Column 7 of table 2)
sum delta_exit_gap

*************************************split here**************************************








*************************************split here**************************************
*************************************split here**************************************
*************************************split here**************************************

*-----------------------------------------
*Part IV: Decomposition (Capital and Export)
*------------------------------------------

cd "D:\Nanjing\2020\wage gap\Replication"   
use "labor-basic-tpu-reg-1.dta",clear
 gen year=t


drop if n==0
replace n=k
* for this decomposition, we comapre firm-level gap in year 1998 and 2007
keep if year==2000|year==2007
gen indc_y=1 if year==2000
replace indc_y=2 if year!=2000
replace year=indc_y



egen indc_f=group(newid)
drop newid
gen party_id=indc_f
duplicates drop party_id year, force
/*
duplicates tag party_id year, gen(indc_dum)
drop if indc_dum>0
*/


* 1. construct log of firm-level efficiency gap in each year

gen lnabsG_gdp=log(absG_gdp)

gen Gap_f=absG_gdp


* 2. divide firms into 3-groups: 
* (1). continuing firms;
* (2). exit;
* (3). entrants.

cap drop ddd
gen ddd=1
bysort party_id: egen indc=sum(ddd)
keep if indc==2


tsset party_id year
tsfill, full
gen dummy1=.
replace dummy1=1 if year==1&(d_export>0&d_export!=.)

gen dummy2=.
replace dummy2=1 if year==2&(d_export>0&d_export!=.)

gen dummy3=.
replace dummy3=1 if year==1&(d_export==0|d_export==.)

gen dummy4=.
replace dummy4=1 if year==2&(d_export==0|d_export==.)

sort party_id
by party_id: egen max_d1=max(dummy1)
by party_id: egen max_d2=max(dummy2)

replace dummy1=max_d1 if dummy1==.
replace dummy2=max_d2 if dummy2==.



* (1)status=1: continuing firms
* (2)status=2: exit
* (3)status=3: entrant
gen status=.
replace status=1 if dummy1==1&dummy2==1
replace status=3 if dummy3==1&dummy2==1
bysort party_id: egen status_new=max(status)
replace status=status_new
drop status_new

replace status=2 if status!=1&status!=3

* 3. generate the output share of firm i in a given year
bysort year status: egen ub_gap=pctile(Gap_f), p(95)
*drop if Gap_f>ub_gap &status==3

sort year
by year: egen ann_labor=sum(n)
gen share=n/ann_labor  /*the share each firm accounts in a given year*/

*****************************************************************************

* 4. construct average efficiency gap in year 1 and year 2
sort year
by year: egen ave_gap=sum(share*Gap_f) /*weighted annual level efficiency gap*/

/*
* The aggregate pollution emission change between pre-WTO and post-WTO.
(1) delta_ave_so2_int=21.06-25.21 (year2-year1)
(2) delta_ave_dust_int=11.11-12.13
(3) delta_ave_gas_int=2.79-2.84
(4) delta_ave_water_int=29.22-34.95

*/

*************************************************************************************************************

* 5. decomposition
*----------------------------------------------------------------------------------------
* 5.1 decompose emission intensity change in the three groups: continuing, exit ,entrant
*----------------------------------------------------------------------------------------
* construct the emission intensity in each year and for each group, 
* but we only need to keep the value for "continuing group"
sort year status

*---------------------------------------------------------------------------------------------
* for continuing firms
by year status: egen cont_labor=sum(n)
by year status: egen entry_labor=sum(n) if status==3
by year status: egen exit_labor=sum(n)  if status==2
gen cont_share=n/cont_labor /*labor share within the continuing firm group in each year*/
by year status: egen cont_gap=sum(cont_share*Gap_f)

gen entry_share=n/entry_labor /*labor share within the continuing firm group in each year*/
by year status: egen entry_gap=sum(entry_share*Gap_f)

gen exit_share=n/exit_labor /*labor share within the continuing firm group in each year*/
by year status: egen exit_gap=sum(exit_share*Gap_f)

/*
* for exit
gen exit_gap=cont_gap              if status==2
gen exit_share=cont_share          if status==2



* for entrants
gen entry_gap=cont_gap              if status==3
gen entry_share=cont_share          if status==3
*/
*----------------------------------------------------------------------------------------------------

replace cont_gap=.      if status!=1
replace cont_labor=.    if status!=1

sort year
by year: egen d1=max(cont_gap)
by year: egen d2=max(cont_labor)

by year: egen d3=max(entry_gap)
by year: egen d4=max(entry_labor)

by year: egen d5=max(exit_gap)
by year: egen d6=max(exit_labor)


* The variable "cont_xxx_int" is the annaul level average emission intensity for continuing firms.
replace cont_gap=d1
replace cont_labor=d2

replace entry_gap=d3
replace entry_labor=d4


replace exit_gap=d5
replace exit_labor=d6


drop d1 d2 d3 d4 d5 d6


gen s1=entry_labor/ann_labor if year==2  /*output share of continuing firms in year 1*/
gen s2=exit_labor/ann_labor if year==2  /*output share of continuing firms in year 2*/




duplicates drop status year, force
tsset status year
sort status year
by status: gen cont_gap_lag=l.cont_gap
by status: gen entry_gap_lag=l.entry_gap
by status: gen exit_gap_lag=l.exit_gap


bysort year: egen entry_gap_lag2=max(entry_gap_lag)
replace entry_gap_lag= entry_gap_lag2
drop  entry_gap_lag2
*****************************************************************************
* Part III: Results for labor-based decomposition
* 1.Decomposition among a. continuing; b. new entrans; c. exit firms;
*****************************************************************************

keep if year==2
gen delta_cont_gap=(cont_gap-cont_gap_lag)
gen delta_entry_gap=s1*(entry_gap-entry_gap_lag)
gen delta_exit_gap=s2*(exit_gap-exit_gap_lag)

*--------------------------------------------------------
*(1) Initial Average gap (Column 1 of Table 2):
sum ave_gap if year==1

*---------------------------------------------------------
*(2) Total % change So2-intensity (Column 2 of Table 2):
*gen total_change=-6.94-0.45+2.91
*sum total_change if year==1

*--------------------------------------------------------------
*(3) change cuased by group 1: continuing (Column 3 of Table 2)
sum delta_cont_gap
*-----------------------------------------------------------
* (4) change cuased by group 2: entrants (Column 6 of table 2)
sum delta_entry_gap
*-----------------------------------------------------------
* (5) change cuased by group 3: exits (Column 7 of table 2)
sum delta_exit_gap

*************************************split here**************************************




